//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying female variable
local female "female_genderize"

// gender of author is known
local known_gender "female_genderize!=."




//
// AER (PREVIOUS PUBS FROM ALL AEA JOURNALS)

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)
append using "${data}/output/all_aea_data_nogender.dta"  // include observations from all AEA journals to capture seniority
keep if journal=="American Economic Review" | type=="JOURNAL ARTICLE" | type=="PAPERS" | type=="REGULAR ARTICLE" | type=="REGULAR ARTICLES" | type=="Regular papers" | type=="SHORTER PAPERS"

// generate number of previous publications in the journal by author
bysort full_name (article_id_chronological): generate previous_pubs = _n
keep if journal=="American Economic Review"  // observations from the AER

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by!=""

// analysis

/// mean cumulative publications all authors
local i=1
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)

/// mean cumulative publications last author
local i=2
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)




//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by!=.

// analysis

/// mean cumulative publications all authors
local i=3
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)

/// mean cumulative publications last author
local i=4
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)




//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by!=.

// analysis

/// mean cumulative publications all authors
local i=5
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)

/// mean cumulative publications last author
local i=6
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)




//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020  // PNAS started comments in 2008
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// include only years with comments, after counting number of publications in years before
drop if year<2008  

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by!=.

// analysis

/// mean cumulative publications all authors
local i=7
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)

/// mean cumulative publications last author
local i=8
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)




//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by_1!=.

// analysis

/// mean cumulative publications all authors
local i=9
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)

/// mean cumulative publications last author
local i=10
local coefficients`i' "2"
regress `female' i.commented i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
margins commented
matrix a`i' = r(b)


